* Firm organization with multiple establishments
* Section V: Appendix Table D.5
* Non-directly affected establishments

clear all 
set matsize 2000
set more off

capture log close
log using log/21c_train_layer-firm_est-indirect_pretrends.log, replace

use data/train_analysis_firm.dta, clear

********************************************************************************
***	Data prep

keep if e_traindata == 1 & entryyear < 2006

***	Define interaction terms
gen d_year2000 = (jahr == 2000)
foreach route in early mid late any {
	forvalues t=2000/2010{
		gen int_firm_`route'_y`t' = firm_ever_faster_`route' * d_year`t'
		}
	}
drop int_firm_any_y2000 int_firm_early_y2002 int_firm_mid_y2004 int_firm_late_y2006

cap drop flg_est* flg_unt*
egen flg_estjhr = tag(betnr jahr)
egen flg_untjhr = tag(untid jahr)
egen flg_est = tag(betnr)

bys betnr: egen max_year = max(jahr)

*** Restrict sample to indirectly affected establishments
keep if hauptbet == 0 & ever_faster1 == 0 & d_traindata == 1 & count_est > 2

***	Dummies for counties that ever have a treated headquarters
foreach route in early mid late any {
	bys ao_kreis: egen ever_treat_cty_`route' = max(firm_ever_faster_`route')
}

save data/train_analysis_firm_pretrends_ind.dta, replace

********************************************************************************
***	Table D.5, bottom right
***	Pretrends, firms with at least two establishments, non-directly affected establishments

use data/train_analysis_firm_pretrends_ind.dta, clear

***	Build data set

//	counties that are affected by routes 1 and 3: 
//	assign 2000-2005 to FFM-CLG, 0 = 2002; assign 2003-2010 to ING-NUR, 0 = 2006
preserve
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 0 & ever_treat_cty_late == 1
keep if jahr < 2006
keep if entryyear < 2002 & max_year > 2002
gen year_cent = .
replace year_cent = jahr - 2002 if ever_treat_cty_early == 1
tab year_cent, m
tab jahr, m
save data/train_controls_earlylate-early-entry.dta, replace
restore

preserve
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 0 & ever_treat_cty_late == 1
keep if jahr > 2002
keep if entryyear < 2006 & max_year > 2006
gen year_cent = .
replace year_cent = jahr - 2006 if ever_treat_cty_late == 1 
tab year_cent, m
save data/train_controls_earlylate-late-entry.dta, replace
restore

//	counties that are affected by routes 2 and 3: 
//	assign 2000-2005 to HH-BER, 0 = 2004; assign 2005-2010 to ING-NUR, 0 = 2006
preserve
keep if ever_treat_cty_early == 0 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 1
keep if jahr < 2006
keep if entryyear < 2004 & max_year > 2004
gen year_cent = .
replace year_cent = jahr - 2004 if ever_treat_cty_mid == 1 
tab year_cent, m
save data/train_controls_midlate-mid-entry.dta, replace
restore

preserve
keep if ever_treat_cty_early == 0 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 1
keep if jahr > 2004
keep if entryyear < 2006 & max_year > 2006
gen year_cent = .
replace year_cent = jahr - 2006 if ever_treat_cty_late == 1 
tab year_cent, m
save data/train_controls_midlate-late-entry.dta, replace
restore

//	counties that are affected by all routes: 
//	assign 2000-2003 to FFM-CLG, 0 = 2002; assign 2003-2005 to HH-BER, 0 = 2004; assign 2005-2010 to ING-NUR, 0 = 2006
preserve
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 1
keep if jahr < 2004
keep if entryyear < 2002 & max_year > 2002
gen year_cent = .
replace year_cent = jahr - 2002 if ever_treat_cty_early == 1
tab year_cent, m
save data/train_controls_earlymidlate-early-entry.dta, replace
restore

preserve
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 1
keep if jahr > 2002 & jahr < 2006
keep if entryyear < 2004 & max_year > 2004
gen year_cent = .
replace year_cent = jahr - 2004 if ever_treat_cty_mid == 1 
tab year_cent, m
save data/train_controls_earlymidlate-mid-entry.dta, replace
restore

preserve
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 1
keep if jahr > 2004
keep if entryyear < 2006 & max_year > 2006
gen year_cent = .
replace year_cent = jahr - 2006 if ever_treat_cty_late == 1 
tab year_cent, m
save data/train_controls_earlymidlate-late-entry.dta, replace
restore

//	counties that are affected by only one route
keep if ever_treat_cty_early == 1 & ever_treat_cty_mid == 0 & ever_treat_cty_late == 0 & entryyear < 2002 & max_year > 2002 | ///
	ever_treat_cty_early == 0 & ever_treat_cty_mid == 1 & ever_treat_cty_late == 0 & entryyear < 2004 & max_year > 2004 | ///
	ever_treat_cty_early == 0 & ever_treat_cty_mid == 0 & ever_treat_cty_late == 1 & max_year > 2006

gen year_cent = .
tab jahr if ever_treat_cty_early == 1
replace year_cent = jahr - 2002 if ever_treat_cty_early == 1
tab jahr if ever_treat_cty_mid == 1
replace year_cent = jahr - 2004 if ever_treat_cty_mid == 1 
tab jahr if ever_treat_cty_late == 1
replace year_cent = jahr - 2006 if ever_treat_cty_late == 1 

tab year_cent, m

append using data/train_controls_earlylate-early-entry.dta
append using data/train_controls_earlylate-late-entry.dta
append using data/train_controls_midlate-mid-entry.dta
append using data/train_controls_midlate-late-entry.dta
append using data/train_controls_earlymidlate-early-entry.dta
append using data/train_controls_earlymidlate-mid-entry.dta
append using data/train_controls_earlymidlate-late-entry.dta

tab year_cent, m

// annual dummies
cap drop d_year_cent_*
foreach t of numlist 1/8 {
	gen d_year_cent_`t' = (year_cent == `t')
}
gen d_year_cent_neg6 = (year_cent == -6)
gen d_year_cent_neg5 = (year_cent == -5)
gen d_year_cent_neg4 = (year_cent == -4)
gen d_year_cent_neg3 = (year_cent == -3)
gen d_year_cent_neg2 = (year_cent == -2)
gen d_year_cent_neg1 = (year_cent == -1)

cap drop int_cent_y*
foreach t in neg6 neg5 neg4 neg3 neg2 neg1 1 2 3 4 5 6 7 8 {
	gen int_cent_y`t' = firm_ever_faster_any * d_year_cent_`t'
}

cap drop int_1_cent_y*
foreach t in neg6 neg5 neg4 neg3 neg2 neg1 1 2 3 4 5 6 7 8 {
	gen int_1_cent_y`t' = firm_ever_faster1 * d_year_cent_`t'
}

// bi-annual dummies
gen d_bi_year_cent_neg56 = (year_cent == -6 | year_cent == -5)
gen d_bi_year_cent_neg34 = (year_cent == -4 | year_cent == -3)
gen d_bi_year_cent_neg12 = (year_cent == -2 | year_cent == -1)

gen d_bi_year_cent_pos12 = (year_cent == 1 | year_cent == 2)
gen d_bi_year_cent_pos34 = (year_cent == 3 | year_cent == 4)
gen d_bi_year_cent_pos56 = (year_cent == 5 | year_cent == 6)

cap drop int_bi_cent_y*
foreach t in neg56 neg34 neg12 pos12 pos34 pos56 {
	gen int_bi_cent_y`t' = firm_ever_faster_any * d_bi_year_cent_`t'
}

cap drop int_1_bi_cent_y*
foreach t in neg56 neg34 neg12 pos12 pos34 pos56 {
	gen int_1_bi_cent_y`t' = firm_ever_faster1 * d_bi_year_cent_`t'
}

***	Run regressions

preserve

bys betnr: egen first_year = min(jahr)
keep if first_year < 2002 & year_cent >= -2 & year_cent <= 4

count
count if flg_est == 1

gen aux_empl = empl_bet if jahr >= 2000 & jahr < 2002
bys betnr: egen empl_2000 = mean(aux_empl)
drop aux_empl

forvalues q = 25(25)75 {
	qui egen empl_perc_`q' = pctile(empl_2000), p(`q')
}
qui gen perc_empl = 100 if empl_2000 < .
forval q = 75(-25)25 {
	qui replace perc_empl = `q' if empl_2000 <= empl_perc_`q' & empl_perc_`q' != .
}
drop empl_perc*

egen group_strata = group(perc_empl jahr)
bysort group_strata: egen tmp1 = sum(firm_ever_faster1 == 0)
gen tmp2 = (tmp1 >0 )
bysort group_strata: egen tmp3 = sum(firm_ever_faster1 == 1)
gen tmp4 = (tmp3 >0 )

bys hq_kreis: egen tmp1b = sum(firm_ever_faster1 == 0)
gen tmp1c = (tmp1b > 0)
by  hq_kreis: egen tmp3b = sum(firm_ever_faster1 == 1)
gen tmp3c = (tmp3b > 0)

bys ao_kreis: egen tmp5b = sum(firm_ever_faster1 == 0)
gen tmp5c = (tmp5b > 0)
by  ao_kreis: egen tmp7b = sum(firm_ever_faster1 == 1)
gen tmp7c = (tmp7b > 0)

cap drop aux_touse
gen aux_touse = (tmp2 > 0 & tmp4 > 0 & tmp1c > 0 & tmp3c > 0 & tmp5c > 0 & tmp7c > 0)

cap drop sample
reghdfe ln_empl_bet firm_faster1 if aux_touse == 1, absorb(betnr county_year hqcounty_year) vce(robust)
qui gen sample = e(sample)

cap drop weight*

bys group_strata: egen tmp7 = sum(firm_ever_faster1 == 0 & sample == 1)
gen tmp8 = (tmp7 > 0)
by  group_strata: egen tmp9 = sum(firm_ever_faster1 == 1 & sample == 1)
gen tmp10 = (tmp9 > 0)

bys hq_kreis: egen tmp11 = sum(firm_ever_faster1 == 0 & sample == 1)
gen tmp12 = (tmp11 > 0)
by  hq_kreis: egen tmp13 = sum(firm_ever_faster1 == 1 & sample == 1)
gen tmp14 = (tmp13 > 0)

bys ao_kreis: egen tmp15 = sum(firm_ever_faster1 == 0 & sample == 1)
gen tmp16 = (tmp15 > 0)
by  ao_kreis: egen tmp17 = sum(firm_ever_faster1 == 1 & sample == 1)
gen tmp18 = (tmp17 > 0)

gen touse = (tmp8 > 0 & tmp10 > 0 & tmp12 > 0 & tmp14 > 0)
cap drop sample
reghdfe ln_empl_bet firm_faster1 if touse == 1, absorb(betnr county_year hqcounty_year) vce(robust)
qui gen sample = e(sample)

cap drop n_treat_s n_ec_s
bysort group_strata: egen n_treat_s = sum(firm_ever_faster1 == 1 & sample == 1)
bysort group_strata: egen n_ec_s = sum(firm_ever_faster1 == 0 & sample == 1)

gen aux_weight = (n_treat_s/n_ec_s) if firm_ever_faster1 == 0 & aux_touse == 1 & sample == 1
replace aux_weight = 1 if firm_ever_faster1 == 1 & aux_touse == 1 & sample == 1

sum firm_ever_faster1 if firm_ever_faster1 == 1 & touse == 1 & sample == 1
local n_treat = `r(N)'

sum firm_ever_faster1 if firm_ever_faster1 == 0 & touse == 1 & sample == 1
local n_ec = `r(N)' 

gen weight = 1 if firm_ever_faster1 == 1 & touse == 1 & sample == 1
replace weight = `n_ec'/`n_treat'*aux_weight if firm_ever_faster1 == 0 & touse == 1 & sample == 1

tabstat weight if touse == 1 & sample == 1, c(s) s(N sum)

reghdfe ln_empl_bet firm_faster1 if touse == 1 & sample == 1 [aweight=weight], absorb(betnr county_year hqcounty_year) vce(robust)
tabstat weight if e(sample), c(s) s(N sum)

drop aux_weight 
cap drop test_*
drop n_treat_s n_ec_s
drop tmp*

local j = 1
foreach outcome in ln_prdt_bet avg_lnw_bot_bet count_mgmt_bet shr_bloss_w_bet {

	qui sum `outcome', det
	qui replace `outcome' = r(p99) if `outcome' > r(p99) & `outcome' != .
	qui sum `outcome', det
	qui replace `outcome' = r(p1) if `outcome' < r(p1) & `outcome' != .

	eststo full`j', title(`outcome'): ///
		qui reghdfe `outcome' firm_ever_faster1 int_1_bi_cent_y* if touse == 1 [aweight=weight], absorb(betnr county_year hqcounty_year) vce(cluster ao_kreis)
		qui estadd scalar count_bet = e(K1): full`j'
		qui estadd scalar count_kreis = e(K2): full`j'

	local j = `j' + 1
}

********************************************************************************
***	Table D.5, bottom right
esttab full1 full2 full3 full4, ///
	b(%9.3f) se(%9.3f) star r2 obslast compress mtitles star(+ 0.20 ++ 0.10 * 0.05 ** 0.01 *** 0.001) stats(r2 N count_bet count_kreis, fmt(%9.3f %9.0f %9.0f %9.0f))  style(tex) label
esttab full1 full2 full3 full4, ///
	b(%9.3f) p(%9.3f) star r2 obslast compress mtitles star(+ 0.20 ++ 0.10 * 0.05 ** 0.01 *** 0.001) stats(r2 N count_bet count_kreis, fmt(%9.3f %9.0f %9.0f %9.0f))  style(tex) label

tab year_cent if e(sample), m
	
restore

log close
